#########
## Nature Human Behaviour 
## Leading Countries in Global Science Increasingly Receive More Citations than Other Countries Despite Doing Similar Research.
## https://doi.org/10.1038/s41562-022-01351-5
## Harvard Dataverse (Code and Metadata): https://doi.org/10.7910/DVN/WCOINR 
## Step 2
## Data: Data_20210905
#########

# At a terminal, run: 
# '''Note: '$1' is the discipline ID that you pass in.'''
# '''Note: the second parameter is fed in as either 'english_only' or 'all' '''
# '''where 'english_only' are the main results and 'all' are the SI results. '''
# ml python/3.6.1
# ml py-ipython/6.1.0_py36 python/3.6.1 py-scipy/1.1.0_py36 py-scikit-learn/0.19.1_py36 py-pandas/0.23.0_py36 gcc/10.1.0 py-pytorch/1.4.0_py36
# ml py-numpy/1.17.2_py36
# ml R
# export PYTHONPATH=$GROUP_HOME/python/lib/python3.6/site-packages:$PYTHONPATH
# srun python3 -u Step_X2_Python3_RR_MAG_KLD_MRQAP_and_Distortion_Network.py "$1" "english_only"

#############################
### Inputs
#############################

# discipline = '147176958'
# language = "english_only"
import sys
discipline = str(sys.argv[1])
language = str(sys.argv[2]) # "all" or "english_only"
#sys.path.insert(0, '.')

#############################
### Time Start
#############################
import time 
start_time = time.time()

#############################
### Modules
#############################
import sys
from collections import Counter
import random
import bz2 
import pickle
import pandas as pd
import numpy as np
import gc
import itertools
import os 
import pickle as pickle
import networkx as nx

from Python_Class_LLDA import LLDA
import Python_Class_MRQAP as MRQAP 

import pyper as pr

# Mulitprocessing
from multiprocessing.dummy import Pool as ThreadPool
import multiprocessing

#############################
### Functions
#############################

def reduce_mem_usage(df):
    """ 
    iterate through all the columns of a dataframe and 
    modify the data type to reduce memory usage.        
    """
    start_mem = df.memory_usage().sum() / 1024**2
   
    for col in df.columns:
        col_type = df[col].dtype
        
        if col_type != object:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max <\
                  np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max <\
                   np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max <\
                   np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max <\
                   np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)  
            else:
                if c_min > np.finfo(np.float16).min and c_max <\
                   np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float16)
                elif c_min > np.finfo(np.float32).min and c_max <\
                   np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)
        else:
            next    
    return df

def YearlyKLDEdgelist(year_):
	print("Year: "+str(year_))	
	try:

		# Source: https://stackoverflow.com/questions/24913425/pythonfast-efficient-implementation-of-the-kullback-leibler-divergence-for-mult
		def KLD_Matrix(phi_df_input):
			"""
			Finds the pairwise Kullback-Leibler divergence
			matrix between all rows in X.

			Parameters
			----------
			X : array_like, shape (n_samples, n_features)
			    Array of probability data. Each row must sum to 1.

			Returns
			-------
			D : ndarray, shape (n_samples, n_samples)
			    The Kullback-Leibler divergence matrix. A pairwise matrix D such that D_{i, j}
			    is the divergence between the ith and jth vectors of the given matrix X.

			Notes
			-----
			Based on code from Gordon J. Berman et al.
			(https://github.com/gordonberman/MotionMapper)

			References:
			-----------
			Berman, G. J., Choi, D. M., Bialek, W., & Shaevitz, J. W. (2014). 
			Mapping the stereotyped behaviour of freely moving fruit flies. 
			Journal of The Royal Society Interface, 11(99), 20140672.
			"""
			matrix_ = np.array(phi_df_input.T)
			matrix_[matrix_==matrix_.min()] = 1e-16
			matrix_ = (matrix_.T/ matrix_.sum(axis=1)).T
			
			matrix_log = np.log(matrix_)
			entropies = -np.sum(matrix_ * matrix_log, axis=1)
			D = np.matmul(-matrix_, matrix_log.T)
			D = D - entropies
			D = D / np.log(2)
			D *= (1 - np.eye(D.shape[0]))
			
			return D

		KLD_edgelist = pd.DataFrame(KLD_Matrix(phi_dictionary[year_]))
		KLD_edgelist.columns = phi_dictionary[year_].columns
		KLD_edgelist.index = KLD_edgelist.columns
		KLD_edgelist = KLD_edgelist.unstack().reset_index().rename(columns={"level_0":"Sender","level_1":"Receiver",0:"KLD"}).query("Sender!=Receiver")
		KLD_edgelist["Vocas_Size"] = phi_dictionary[year_].shape[0]

		KLD_edgelist["Year"] = year_
		KLD_edgelist["Discipline"] = discipline

		return KLD_edgelist

	except:
		return pd.DataFrame()

def stars(p_values):
	star_value = ""
	if p_values <= 0.05 and p_values>0.01:
		star_value = '*'
	if p_values <= 0.01:
		star_value = '**'
	return star_value

def mean_center_standard(df):
	mask = np.ones((df.shape[0],df.shape[0]))
	mask = (mask - np.diag(np.ones(df.shape[0]))).astype(np.bool)
	return np.reshape((df -  np.nanmean(df[mask]))/ np.nanstd(df[mask]),(df.shape[0],df.shape[0]))

def MRQAP_Model_Statistics_Mean_and_SD_Centered(Y_Input,X_Variables_Input,Variable_List_Input,y_type,model_type,year_):
	#####
	##
	######
	r = pr.R(use_pandas=True)
	r("library('asnipe')")
	r.assign("Y", mean_center_standard(Y_Input))
	r("diag(Y)<-0")
	
	#####
	##
	######
	r.assign("number_of_variables",len(Variable_List_Input))
	r("X_Dyads<-array(dim=c(dim(Y)[1],dim(Y)[2],number_of_variables))")
	for i, x_ in enumerate(Variable_List_Input):
		#print i, x_
		r.assign("i",i+1) # R starts at 1 not zero.
		X_Variables_Input[Variable_List_Input[i]][np.isnan(X_Variables_Input[Variable_List_Input[i]])] = 0 #Remove NAs
		
		# Mean Center and Standardize Continuous variables
		r.assign("X_Variables",mean_center_standard(X_Variables_Input[Variable_List_Input[i]]))
		r("diag(X_Variables)<-0")
		r("X_Dyads[,,i]<-X_Variables")
	#####
	##
	#####
	number_of_variables_for_model = len(Variable_List_Input)
	string_variables = ""
	for i_ in range(1,number_of_variables_for_model+1):
		if i_>1:
			string_variables = string_variables + "+" + "X_Dyads[,,"+str(i_)+"]"
		else:
			string_variables = "Y~X_Dyads[,,"+str(i_)+"]"
	#####
	##
	######
		model_string = '''
			mrqap_model_ <- mrqap.dsp(
				PASTE_MODEL_HERE, 
				intercept = TRUE, 
				directed = "directed",
				diagonal = FALSE, 
				test.statistic = "t-value",
				tol = 1e-07, 
				randomisations = 1000)
			'''
	# if "Cosine" not in y_type or "Cosine_Past"==y_type:
	# 	model_string = '''
	# 		mrqap_model_ <- mrqap.dsp(
	# 			PASTE_MODEL_HERE, 
	# 			intercept = TRUE, 
	# 			directed = "directed",
	# 			diagonal = FALSE, 
	# 			test.statistic = "t-value",
	# 			tol = 1e-07, 
	# 			randomisations = 1000)
	# 		'''
	# else:
	# 	model_string = '''
	# 	mrqap_model_ <- mrqap.dsp(
	# 		PASTE_MODEL_HERE, 
	# 		intercept = TRUE, 
	# 		directed = "undirected",
	# 		diagonal = FALSE, 
	# 		test.statistic = "t-value",
	# 		tol = 1e-07, 
	# 		randomisations = 1000)
	# 	'''	

	model_string = str.replace(model_string,"PASTE_MODEL_HERE",string_variables)
	r(model_string)
	#####
	##
	######
	r('''all_output <- capture.output(print(mrqap_model_))''')
	all_output = r.get('''all_output''')
	#####
	##
	######
	pydata = r.get("mrqap_model_")
	model_df_Dekker_ = pd.DataFrame({"Coefficients":["Intercept"]+Variable_List_Input,
							"Test_Statistics":pydata["test.statistic"],
							"Beta":pydata["coefficients"],
							"P_Values":pydata["P.values"],
							"P_Values_Lesser":pydata["P.lesser"],
							"P_Values_Greater":pydata["P.greater"]})
	#####
	##
	######
	model_df_Dekker_["AIC"] = pydata["AIC"]
	model_df_Dekker_["N"] = pydata["n"]
	model_df_Dekker_["DF_Residual"] = pydata["df.residual"]
	model_df_Dekker_["Rank"] = pydata["rank"]
	#####
	##
	######
	model_df_Dekker_["Multiple_RSquare"] = all_output[-3].split("\t")[0].split(": ")[1].rstrip()
	model_df_Dekker_["Adjusted_RSquare"] = all_output[-3].split("\t")[1].split(": ")[1].rstrip()
	model_df_Dekker_["F_Statistic"] = all_output[-4].split(": ")[1].split(" on ")[0]
	#####
	##
	######
	model_df_Dekker_["Year"] = year_
	model_df_Dekker_["Discipline"] = discipline
	model_df_Dekker_["Stars"] = model_df_Dekker_["P_Values"].apply(lambda x: stars(x))
	#####
	##
	######
	model_df_Dekker_["Y"] = y_type
	model_df_Dekker_["Model_Type"] = model_type
	#####
	##
	#####
	return model_df_Dekker_

def MRQAP_KLD_Results(year_):
	try:
		X_Variables = {}
		Y = {}

		# Constraints
		# Citations Sum > 0 Dyad
		# Number of Papers Produced by a Country > 1
		minimum_number_of_papers = 1

		print(str(year_)+" "+str(COUNTRY_INCLUSION)+" "+str(journal_censored)+" "+str(citation_type))

		#######
		## Edgelists
		######
		KLD_df_year = KLD_edgelist_df.query("Year==@year_")		

		nodes_list_year = num_papers_df.query("Year==@year_").query("Number_of_Papers>@minimum_number_of_papers").Nation.tolist()

		core_countries = ["UNITED_STATES","UNITED_KINGDOM","GERMANY","CANADA","AUSTRALIA","NEW_ZEALAND","ITALY","SPAIN","FINLAND","NORWAY","SWEDEN","AUSTRIA","PORTUGAL","FRANCE","NETHERLANDS","DENMARK","JAPAN","SOUTH_KOREA","SWITZERLAND","BELGIUM","IRELAND","CHINA","SINGAPORE"]

		if COUNTRY_INCLUSION=="Core":
			nodes_list_year = [x for x in core_countries if str(x)!='nan']
		elif COUNTRY_INCLUSION=="All_wo_Core":
			nodes_list_year = [x for x in nodes_list_year if str(x)!='nan' and x not in core_countries]
		elif COUNTRY_INCLUSION=="All":
			nodes_list_year = [x for x in nodes_list_year if str(x)!='nan']
		
		nodes_i = pd.DataFrame({"i":nodes_list_year})
		nodes_i.index = [0]*len(nodes_i)
		nodes_j = pd.DataFrame({"j":nodes_list_year})
		nodes_j.index = [0]*len(nodes_j)
		nodes_df = pd.merge(nodes_i,nodes_j,right_index=True,left_index=True).reset_index(drop=True)
		nodes_df["Dyad"] = np.where(nodes_df["i"].astype(str)>nodes_df["j"].astype(str),nodes_df["i"].astype(str)+"+"+nodes_df["j"].astype(str),nodes_df["j"].astype(str)+"+"+nodes_df["i"].astype(str))
		nodes_df = nodes_df.query("i!=j")

		############
		## KLD
		############
		KLD_df_year = pd.merge(nodes_df,KLD_df_year[["KLD_CenterNormed","Sender","Receiver"]].rename(columns={"Sender":"i","Receiver":"j"}),left_on=["i","j"],right_on=["i","j"],how="left").drop(columns="Dyad").fillna(0)
		H = nx.from_pandas_edgelist(KLD_df_year,'i','j','KLD_CenterNormed',create_using=nx.DiGraph())
		matrix_KLD = pd.DataFrame(np.array(nx.adjacency_matrix(H,weight="KLD_CenterNormed").todense()))
		np.fill_diagonal(matrix_KLD.values,0)

		Y["KLD_CenterNormed"] = np.array(matrix_KLD)
		
		######
		## Citation
		######
		# j to i (j-Receiver to i-Sender)
		citation_df_year = citation_df.query("Year==@year_").rename(columns={"Sender":"i","Receiver":"j"})[["i","j","Citations_ji"]].fillna(0)
		citation_df_year = pd.merge(nodes_df,citation_df_year[["i","j","Citations_ji"]],on=["i","j"],how="left")
		citation_df_year = citation_df_year.fillna(0)
		G_Citations_ji = nx.from_pandas_edgelist(citation_df_year,'i','j','Citations_ji',create_using=nx.DiGraph())
		matrix_ji_citation = pd.DataFrame(np.array(nx.adjacency_matrix(G_Citations_ji,weight='Citations_ji').todense()))
		np.fill_diagonal(matrix_ji_citation.values,0)
		X_Variables["Citations_ji"] = np.array(matrix_ji_citation)

		######
		## Number of Papers Sender
		######
		number_of_papers_df_year_i_j = num_papers_df.query("Year==@year_").rename(columns={"Nation":"i"})[["i","Number_of_Papers"]].fillna(0)

		number_of_papers_df_year = pd.merge(nodes_df,number_of_papers_df_year_i_j.rename(columns={"Number_of_Papers":"Number_of_Sender_Papers"})[["i","Number_of_Sender_Papers"]],on=["i"],how="left")
		number_of_papers_df_year = pd.merge(number_of_papers_df_year,number_of_papers_df_year_i_j.rename(columns={"i":"j","Number_of_Papers":"Number_of_Receiver_Papers"})[["j","Number_of_Receiver_Papers"]],on=["j"],how="left")

		number_of_papers_df_year["Number_of_Max_Papers"] = number_of_papers_df_year[["Number_of_Sender_Papers", "Number_of_Receiver_Papers"]].max(axis=1)
		number_of_papers_df_year["Number_of_Papers_Difference_ij"] = number_of_papers_df_year["Number_of_Sender_Papers"] - number_of_papers_df_year["Number_of_Receiver_Papers"]

		number_of_papers_df_year = number_of_papers_df_year.drop(columns="Number_of_Sender_Papers").drop(columns="Number_of_Receiver_Papers")
		number_of_papers_df_year = number_of_papers_df_year.fillna(0)

		G_NumPapersMax_ij = nx.from_pandas_edgelist(number_of_papers_df_year,'i','j','Number_of_Max_Papers',create_using=nx.DiGraph())
		matrix_ij_number_of_papers = pd.DataFrame(np.array(nx.adjacency_matrix(G_NumPapersMax_ij,weight='Number_of_Max_Papers').todense()))
		np.fill_diagonal(matrix_ij_number_of_papers.values,0)
		X_Variables["Number_of_Max_Papers"] = np.array(matrix_ij_number_of_papers)

		G_NumPapersDiff_ij = nx.from_pandas_edgelist(number_of_papers_df_year,'i','j','Number_of_Papers_Difference_ij',create_using=nx.DiGraph())
		matrix_ij_number_of_papers = pd.DataFrame(np.array(nx.adjacency_matrix(G_NumPapersDiff_ij,weight='Number_of_Papers_Difference_ij').todense()))
		np.fill_diagonal(matrix_ij_number_of_papers.values,0)
		X_Variables["Number_of_Papers_Difference_ij"] = np.array(matrix_ij_number_of_papers)

		###############
		# MRQAP Models
		###############

		all_variables_model = ["Citations_ji"]
		model_df_KLD_ji = MRQAP_Model_Statistics_Mean_and_SD_Centered(Y["KLD_CenterNormed"],X_Variables,all_variables_model,"KLD_CenterNormed","Citations_ji" ,year_)

		all_variables_model = ["Citations_ji","Number_of_Papers_Difference_ij"]
		model_df_KLD_ji_NumPapersDiffij = MRQAP_Model_Statistics_Mean_and_SD_Centered(Y["KLD_CenterNormed"],X_Variables,all_variables_model,"KLD_CenterNormed","Citations_ji_and_NumDiffijPapers" ,year_)

		model_df_KLD = model_df_KLD_ji.append(model_df_KLD_ji_NumPapersDiffij)

		return model_df_KLD

	except:
		print("Error | Citation "+str(year_))
		return pd.DataFrame()

def normalize_one(column_):
	return (column_ - column_.min())/(column_.max()-column_.min())

#############################
## Number of Papers per Country
#############################

#Number_of_Papers_per_Label_df = pd.read_csv("/MAG_Field_Data/MetaData/INPUT_Number_of_Papers_per_Field.csv.bz2")

#############################
## GRID Database with GRID
#############################

df_grid_database = pd.read_csv("addresses.csv")

grid_df = df_grid_database[["grid_id","country"]].rename(columns={'country':"Country"})
grid_df["Country"] = grid_df["Country"].str.lstrip().str.rstrip().str.upper().str.replace(" ","_")
grid_df = grid_df.dropna()

grid_df["Country"] = grid_df["Country"].replace(to_replace={'BAHAMAS_THE':'BAHAMAS',"KOREA_DEM_REP":"NORTH_KOREA","DEMOCRATIC_REPUBLIC_OF_THE_CONGO":"CONGO","SAINT_":"ST_","MACAO_SAR_PEOPLES_R_CHINA":"MACAO","CZECHIA":"CZECH_REPUBLIC"})

#############################
## Independent Variables
#############################
Field_Data_Filename = "OUTPUT_Python_MAG_Field_MetaData_Dict_"+str(discipline)+".pbz2"
f = bz2.BZ2File(Field_Data_Filename, 'rb')
dict_grid_labels = pickle.load(f)
dict_year = pickle.load(f)
dict_coauthor_edgelist = pickle.load(f)
dict_mobility_edgelist = pickle.load(f)
dict_citation_edgelist = pickle.load(f)
dict_citation_year_edgelist = pickle.load(f)

grid_labels_df = pd.DataFrame(dict_grid_labels)
year_df_input = pd.DataFrame(dict_year)
#coauthor_df_input = pd.DataFrame(dict_coauthor_edgelist)
#mobility_df_input = pd.DataFrame(dict_mobility_edgelist)
#citation_df_input = pd.DataFrame(dict_citation_edgelist).rename(columns={"citationgridid":"citing_gridid","citations":"numauthor"})

del dict_grid_labels
del dict_year
del dict_coauthor_edgelist
del dict_mobility_edgelist
del dict_citation_edgelist
del dict_citation_year_edgelist
gc.collect()


#############################
### Read in Files for Fields
#############################
# International KLD Similarity

if language=="english_only":
	model_df_MRQAP_KLD_Filename = "OUTPUT_Python_Field_MRQAP_Beta_KLD_Citation_Corpus_RAKE_and_GoogleAPI_EnglishOnly_"+str(discipline)+".csv.gz"
	KLD_citation_df_Filename = "OUTPUT_Python_Field_KLD_Citation_Edgelist_Corpus_RAKE_and_GoogleAPI_EnglishOnly_"+str(discipline)+".csv.gz"
	Delta_df_Filename = "OUTPUT_Python_Field_Delta_Degree_Centrality_KLD_Citation_Corpus_RAKE_and_GoogleAPI_EnglishOnly_"+str(discipline)+".csv.gz"
	Degree_df_Filename = "OUTPUT_Python_Field_Degree_Centrality_KLD_Citation_Corpus_RAKE_and_GoogleAPI_EnglishOnly_"+str(discipline)+".csv.gz"

else:
	model_df_MRQAP_KLD_Filename = "OUTPUT_Python_Field_MRQAP_Beta_KLD_Citation_Corpus_RAKE_and_GoogleAPI_All_"+str(discipline)+".csv.gz"
	KLD_citation_df_Filename = "OUTPUT_Python_Field_KLD_Citation_Edgelist_Corpus_RAKE_and_GoogleAPI_All_"+str(discipline)+".csv.gz"
	Delta_df_Filename = "OUTPUT_Python_Field_Delta_Degree_Centrality_KLD_Citation_Corpus_RAKE_and_GoogleAPI_All_"+str(discipline)+".csv.gz"
	Degree_df_Filename = "OUTPUT_Python_Field_Degree_Centrality_KLD_Citation_Corpus_RAKE_and_GoogleAPI_All_"+str(discipline)+".csv.gz"

#############################
### Calculate Metrics by Coherence Cutoff
#############################

model_df_Beta_KLD_Final = pd.DataFrame()
KLD_citation_df_Final = pd.DataFrame()
Delta_df_Final = pd.DataFrame()
Degree_df_Final = pd.DataFrame()

for journal_censored in ["No_Censoring","Since_1980"]:

	print("-----------------------")
	print(journal_censored)
	print("-----------------------")

	if journal_censored == "Since_1980":
		journal_censored_filename = "_Journal_Censored_Since_1980_"
	elif journal_censored == "Since_2000":
		journal_censored_filename = "_Journal_Censored_Since_2000_"
	else:
		journal_censored_filename = "_"


	#############################
	## RR | Read in Citations
	#############################
	# Citations | j - Receiver (who received citation is sending information) i - Sender (who is citing receives information)

	if journal_censored == "No_Censoring":
		citation_df_input = pd.read_csv("OUTPUT_Python_MAG_RR_Citation_Country_by_Year_"+str(discipline)+".csv.gz")
		citation_df_input = citation_df_input.rename(columns={"Country_Citing":"Sender","Country_Cited":"Receiver"})
	if journal_censored == "Since_1980":
		citation_df_input = pd.read_csv("OUTPUT_Python_MAG_RR_Journal_Censored_Citation_Country_by_Year_"+str(discipline)+".csv.gz")
		citation_df_input = citation_df_input.rename(columns={"Country_Citing":"Sender","Country_Cited":"Receiver"})	

	#############################
	### Create Filenames for KLD Edgelist  
	#############################

	if language=="english_only":
		NLLDA_Dict_Filename = "OUTPUT_Python_MAG"+str(journal_censored_filename)+"Yearly_NLLDA_Dict_Corpus_RAKE_and_GoogleAPI_EnglishOnly_"+str(discipline)+".pbz2"
		Coherence_Filename = "OUTPUT_Python_MAG"+str(journal_censored_filename)+"Yearly_NLLDA_Topic_Coherence_Scores_EnglishOnly_"+str(discipline)+".pbz2"
		KLD_Edgelist_Filename = "OUTPUT_Python_MAG"+str(journal_censored_filename)+"International_KLD_Edgelist_Corpus_RAKE_and_GoogleAPI_EnglishOnly_"+str(discipline)+".csv.gz"
	if language=="all":
		NLLDA_Dict_Filename = "OUTPUT_Python_MAG"+str(journal_censored_filename)+"Yearly_NLLDA_Dict_Corpus_RAKE_and_GoogleAPI_All_"+str(discipline)+".pbz2"
		Coherence_Filename = "OUTPUT_Python_MAG"+str(journal_censored_filename)+"Yearly_NLLDA_Topic_Coherence_Scores_All_"+str(discipline)+".pbz2"
		KLD_Edgelist_Filename = "OUTPUT_Python_MAG"+str(journal_censored_filename)+"International_KLD_Edgelist_Corpus_RAKE_and_GoogleAPI_All_"+str(discipline)+".csv.gz"

	###################################
	##### Load in NLLDA Dictionary | Beta = 0.1
	###################################
	if journal_censored=="No_Censoring":
		NLLDA_Min_Year = 1950 #The Earliest Year of the NLLDA
	else:
		NLLDA_Min_Year = 1980 #The Earliest Year of the NLLDA

	NLLDA_Max_Year = 2017 #The Last Year

	'''
	What It Does: Read in NLLDA Files as a pickle
	pickle file saved as a dictionary of NLLDA Models
	So, Load in that way. 
	'''
	f = bz2.BZ2File(NLLDA_Dict_Filename, 'rb')
	Dictionary_of_NLLDA = {}
	# "Betas" are NLLDA hyperparameters
	# Here, we're only considering Beta = 0.1
	for beta in [0.1,0.5,0.9]:
		if beta == 0.1:
			Dictionary_of_NLLDA = {}
			for year in range(NLLDA_Min_Year,NLLDA_Max_Year+1):
				try:
					print("beta "+str(beta)+" year "+str(year))
					Dictionary_of_NLLDA[year] = pickle.load(f)
				except:
					Dictionary_of_NLLDA[year] = {}

	#############################
	## Number of Papers per Country and Percent of Papers 
	#############################
	if journal_censored!="No_Censoring":
		num_papers_df = pd.DataFrame()
		for year in range(NLLDA_Min_Year,NLLDA_Max_Year+1):
			try:
				labelmap_ = Dictionary_of_NLLDA[year].labelmap
				labelmap_ = {v:k for k, v in labelmap_.items()}
				label_paper_count_year_ = pd.DataFrame(Dictionary_of_NLLDA[year].n_m_z).rename(columns=labelmap_).sum().reset_index().rename(columns={'index':'Nation',0:'Number_of_Papers'})
				label_paper_count_year_["Year"] = year
				label_paper_count_year_ = label_paper_count_year_.query("Nation!='common'")
				num_papers_df = num_papers_df.append(label_paper_count_year_)
			except:
				print("Error")

	else:
		num_papers_df = pd.merge(grid_labels_df.drop_duplicates(),year_df_input,on=["paperid"],how="left")
		num_papers_df = pd.merge(num_papers_df,grid_df.rename(columns={"grid_id":"gridid"}),on=["gridid"],how="left").rename(columns={"year":"Year"})
		num_papers_df = num_papers_df.groupby(["Country","Year"]).agg({"paperid":"count"}).reset_index().rename(columns={"Country":"Nation","paperid":"Number_of_Papers"})

	# c = num_papers_df.groupby(['Year'])['Number_of_Papers'].sum().rename("count").reset_index()
	# percent_papers_df = pd.merge(num_papers_df,c,on=["Year"])
	# percent_papers_df["Percent_of_Papers"] = percent_papers_df["Number_of_Papers"]/percent_papers_df['count']
	# percent_papers_df = percent_papers_df[["Nation","Year","Percent_of_Papers"]]

	# UPDATE - Dyad Citation Deflation (Baseline 2000)
	Dyad_Citation_Deflation_df = pd.merge(num_papers_df.set_index(pd.Index([0]*num_papers_df.shape[0])),num_papers_df.query("Year==2000").set_index(pd.Index([0]*num_papers_df.query("Year==2000").shape[0])),suffixes=("","_2000"),left_index=True,right_index=True).query("Nation==Nation_2000")
	Dyad_Citation_Deflation_df["Deflator"] = (Dyad_Citation_Deflation_df["Number_of_Papers_2000"])/(Dyad_Citation_Deflation_df["Number_of_Papers"])
	Dyad_Citation_Deflation_df = Dyad_Citation_Deflation_df[["Nation","Year","Deflator"]]

	###################################
	##### Number of Documents in the Field | Citation Deflation 
	###################################

	Number_of_Field_Documents = {}
	for year_ in Dictionary_of_NLLDA.keys():
		Number_of_Field_Documents[year_] = len(Dictionary_of_NLLDA[year_].docs)

	Number_of_Field_Documents_df = pd.DataFrame.from_dict(Number_of_Field_Documents,orient='index').reset_index().rename(columns={0:"Number_of_Field_Documents","index":"Year"})

	# 2021 09 05 - Check os.path for edgelist for jouranl censoring (if statement not working)
	if os.path.exists(KLD_Edgelist_Filename)==False:

		###################################
		##### Create Phi DataFrames for Each Year
		###################################
		'''
		What It Does: Extract Phi Matrices, Country Labels (columns), and Vocas IDs (indices) into Dictionaries
		'''
		phi_dictionary = {}

		# Start in 1980
		for year_ in range(1980,NLLDA_Max_Year+1):
			try:
				print("Phi Year: "+str(year_))

				# Extract necessary parts from NLLDA Dictionary you loaded in
				phi_ = Dictionary_of_NLLDA[year_].phi()
				labelmap_ = Dictionary_of_NLLDA[year_].labelmap
				vocas_ = Dictionary_of_NLLDA[year_].vocas_id

				# Swap label and vocas dictionaries so that they are id number first
				# E.g., US:3 becomes 3:US, where 3 is the fourth column in Phi
				# E.g., Social Capital:34 becomes 34:Social Capital, the 35th row in Phi
				labelmap_ = {v:k for k, v in labelmap_.items()}
				vocas_ = {v:k for k, v in vocas_.items()}

				### Create Phi DF where columns have country labels and rows have the vocabulary.
				### Raw | Remove lowest value to zero for 'raw' 
				### Normed | Remove lowest value and then renormalize sum to 
				### 100% then renormalize to sum.
				### *********************************
				### Sanity Check | Compare column by column Term-to-Label presence matrix 
				### NLLDA[year_].n_z_t with normalied phi_df_normed[label][phi_df_normed[label]>0]
				### Once the lowest value is set to 0, the lengths should match. 
				### Indicating that lowest value in non-normed phi_df reflects non-present terms.
				# Raw
				phi_df = pd.merge(pd.DataFrame.from_dict(vocas_,orient='index'),pd.DataFrame(phi_.T).rename(columns=labelmap_),left_index=True,right_index=True).set_index(0)
				phi_df = phi_df.apply(lambda x: x.replace(np.min(x),0),axis=0)
				# Normed

				# Collect and Save to a Dictionary
				phi_dictionary[year_] = phi_df

				del phi_df
				gc.collect()

			except:
				phi_dictionary[year_] = pd.DataFrame()


		#############################
		### Get KLD and Output Results
		#############################

		# Pass in beta_ as a global variable
		pool = ThreadPool(multiprocessing.cpu_count()-1)  # Number of threads
		KLD_edgelist_df_FULL = pd.concat(pool.map(YearlyKLDEdgelist, range(NLLDA_Min_Year,2017+1)))
		pool.close()

		#### Print to CSV
		KLD_edgelist_df_FULL.to_csv(KLD_Edgelist_Filename,index=False,compression="gzip")

	else:
		KLD_edgelist_df_FULL = pd.read_csv(KLD_Edgelist_Filename)

		try: 
			del KLD_edgelist_df_FULL["Discipline"]
		except:
			print("Discipline not present in KLD Edgelist")

	##############################
	#### Topic Coherence Scores
	##############################
	f = bz2.BZ2File(Coherence_Filename, 'rb')
	Topic_Coherence_Dict = pickle.load(f)
	Topic_Coherence_df = pd.DataFrame.from_dict(Topic_Coherence_Dict,orient='index').unstack().reset_index().rename(columns={"level_0":"Nation","level_1":"Year",0:"Topic_Coherence_Percentile"})

	Topic_Coherence_df["Topic_Coherence_Percentile"] = Topic_Coherence_df.groupby(["Year"]).apply(lambda x: 1-x.rank(pct=True))["Topic_Coherence_Percentile"]

	Topic_Coherence_df = Topic_Coherence_df.query("Nation!='common'")

	# Citation Window (removed citation window 2 and 5)
	citation_window = 5
	for citation_type in ["Field_Deflated","Dyad_Deflated","No_Censoring"]:
		if citation_type=="Field_Deflated":
			# Deflated DF | Year 2000 Baseline
			deflated_df = Number_of_Field_Documents_df.copy()
			deflated_df["Deflator"] = pd.DataFrame({"Deflator":Number_of_Field_Documents_df.query("Year==2000")["Number_of_Field_Documents"].values[0]/Number_of_Field_Documents_df["Number_of_Field_Documents"].values.tolist()}).replace([np.inf, -np.inf], np.nan)
			deflated_df = deflated_df.drop(columns=['Number_of_Field_Documents'])
			citation_df = pd.merge(citation_df_input.query("Citing_Year-Cited_Year<=@citation_window"),deflated_df.rename(columns={"Year":"Citing_Year"}),on=["Citing_Year"],how="left")
			citation_df["Number_of_Cites"] = citation_df["Number_of_Cites"]*citation_df["Deflator"]
			citation_df = citation_df.drop(columns=["Deflator"])
			citation_df = citation_df.drop(columns=["Citing_Year"]).rename(columns={"Cited_Year":"Year","Number_of_Cites":"Citations_ji"}).groupby(["Discipline","Sender","Receiver","Year"]).agg({"Citations_ji":"sum"}).reset_index()

		if citation_type=="Dyad_Deflated":
			# Deflated DF | Year 2000 Baseline
			citation_df = pd.merge(citation_df_input.query("Citing_Year-Cited_Year<=@citation_window"),Dyad_Citation_Deflation_df.rename(columns={"Year":"Cited_Year","Nation":"Receiver","Deflator":"Deflator_Receiver"}),on=["Cited_Year","Receiver"],how="left")
			citation_df = pd.merge(citation_df.query("Citing_Year-Cited_Year<=@citation_window"),Dyad_Citation_Deflation_df.rename(columns={"Year":"Citing_Year","Nation":"Sender","Deflator":"Deflator_Sender"}),on=["Citing_Year","Sender"],how="left")
			citation_df = citation_df.dropna()
			citation_df["Deflator"] = citation_df["Deflator_Sender"]*citation_df["Deflator_Receiver"]
			citation_df["Number_of_Cites"] = citation_df["Number_of_Cites"]*citation_df["Deflator"]
			citation_df = citation_df.drop(columns=["Deflator","Deflator_Sender","Deflator_Receiver"])
			citation_df = citation_df.drop(columns=["Citing_Year"]).rename(columns={"Cited_Year":"Year","Number_of_Cites":"Citations_ji"}).groupby(["Discipline","Sender","Receiver","Year"]).agg({"Citations_ji":"sum"}).reset_index()

		if citation_type=="No_Censoring":
			# Citation (Normal)
			citation_df = citation_df_input.query("Citing_Year-Cited_Year<=@citation_window").drop(columns=["Citing_Year"]).rename(columns={"Cited_Year":"Year","Number_of_Cites":"Citations_ji"}).groupby(["Discipline","Sender","Receiver","Year"]).agg({"Citations_ji":"sum"}).reset_index()

		for percentile_cutoff_ in [0,0.25,0.75]:
			
			print("Coherence Percentile: "+str(percentile_cutoff_))

			KLD_edgelist_df = KLD_edgelist_df_FULL.query("Sender!='common' & Receiver!='common'")
			KLD_edgelist_df = pd.merge(KLD_edgelist_df,Topic_Coherence_df,
				left_on=["Sender","Year"],
				right_on=["Nation","Year"]).query("Topic_Coherence_Percentile>=@percentile_cutoff_").drop(columns="Topic_Coherence_Percentile").drop(columns=["Nation"])
			KLD_edgelist_df = pd.merge(KLD_edgelist_df,Topic_Coherence_df,
				left_on=["Receiver","Year"],
				right_on=["Nation","Year"]).query("Topic_Coherence_Percentile>=@percentile_cutoff_").drop(columns="Topic_Coherence_Percentile").drop(columns=["Nation"])

			KLD_edgelist_df["KLD_CenterNormed"] = KLD_edgelist_df.groupby('Year')[['KLD']].transform(lambda x: ((x.mean()-x)/x.std()))

			if os.path.exists(model_df_MRQAP_KLD_Filename)==False: 
				#################
				###### MRQAP | Citations
				#################

				COUNTRY_INCLUSION = "All"
				pool = ThreadPool(multiprocessing.cpu_count()-1)  # Number of threads
				model_df_Beta_KLD_All = pd.concat(pool.map(MRQAP_KLD_Results, range(KLD_edgelist_df.Year.min(),KLD_edgelist_df.Year.max()+1)))
				pool.close()
				model_df_Beta_KLD_All["Country_Inclusion_Criteria"] = "All"

				COUNTRY_INCLUSION = "All_wo_Core"
				pool = ThreadPool(multiprocessing.cpu_count()-1)  # Number of threads
				model_df_Beta_KLD_All_wo_Core = pd.concat(pool.map(MRQAP_KLD_Results, range(KLD_edgelist_df.Year.min(),KLD_edgelist_df.Year.max()+1)))
				pool.close()
				model_df_Beta_KLD_All_wo_Core["Country_Inclusion_Criteria"] = "All_wo_Core"

				COUNTRY_INCLUSION = "Core"
				pool = ThreadPool(multiprocessing.cpu_count()-1)  # Number of threads
				model_df_Beta_KLD_Core = pd.concat(pool.map(MRQAP_KLD_Results, range(KLD_edgelist_df.Year.min(),KLD_edgelist_df.Year.max()+1)))
				pool.close()
				model_df_Beta_KLD_Core["Country_Inclusion_Criteria"] = "Core"

				model_df_Beta_KLD = model_df_Beta_KLD_All_wo_Core.append(model_df_Beta_KLD_Core).append(model_df_Beta_KLD_All)

				#FINAL BETA DF
				model_df_Beta_KLD["Percentile_Cutoff"] = percentile_cutoff_
				model_df_Beta_KLD["Citation_Window"] = citation_window
				model_df_Beta_KLD["Citation_Type"] = citation_type
				model_df_Beta_KLD["Journal_Censoring"] = journal_censored
				model_df_Beta_KLD = pd.merge(model_df_Beta_KLD,Number_of_Field_Documents_df,on=["Year"],how="left")
				model_df_Beta_KLD_Final = model_df_Beta_KLD_Final.append(model_df_Beta_KLD)

			if (os.path.exists(KLD_citation_df_Filename)==False) or (os.path.exists(Delta_df_Filename)==False) or (os.path.exists(Degree_df_Filename)==False):

				####################################
				##### Delta | Combine Cosine and Citation Edgelist
				####################################
				# Updated 2021 09 10
				# Citation is j (Receiver) to i (Sender) so KLD (set up as i to j) needs to be reversed. 
				# Because the Citation Receiver j is the KLD sender i. 

				# NOTE THIS NEED TO BE A LEFT JOIN TO THE KLD EDGELIST
				# WHY - Lots of superfluous citations that don't exist in the KLD
				# So, will have NULL KLDs that become ZERO and skew the field centering. 
				# IOW, KLD is our basis not citations because that's what we're comparing it to! 
				KLD_citation_df = pd.merge(KLD_edgelist_df.rename(columns={"Sender":"Receiver","Receiver":"Sender"}),citation_df,on=["Sender","Receiver","Year"],how="left").fillna(0).query("Sender!='common' & Receiver!='common'") #Left Join INTO KLD
				KLD_citation_df = pd.merge(KLD_citation_df,KLD_edgelist_df.drop(columns=["KLD","Vocas_Size"]).rename(columns={"KLD_CenterNormed":"KLD_CenterNormed_T"}),on=["Sender","Receiver","Year"],how="left").fillna(0).query("Sender!='common' & Receiver!='common'")
				KLD_citation_df = pd.merge(KLD_citation_df,citation_df.rename(columns={"Sender":"Receiver","Receiver":"Sender","Citations_ji":"Citations_ij"}),on=["Sender","Receiver","Year","Discipline"],how="left").fillna(0).query("Sender!='common' & Receiver!='common'")
				
				KLD_citation_df = pd.merge(KLD_citation_df,num_papers_df.query("Number_of_Papers>1").rename(columns={"Nation":"Sender"}),on=["Sender","Year"],how="left")
				KLD_citation_df = pd.merge(KLD_citation_df,num_papers_df.query("Number_of_Papers>1").rename(columns={"Nation":"Receiver"}),on=["Receiver","Year"],how="left",suffixes=("_i","_j"))
				KLD_citation_df = KLD_citation_df.fillna(0)

				#####################
				###### Delta
				#####################
				# Updated 2021 09 05
				# i Sender and j Receiver				
				KLD_citation_df["Citations_ij_CenterNormed"] = KLD_citation_df.groupby('Year')[['Citations_ij']].transform(lambda x: (x-x.mean())/x.std())
				KLD_citation_df["Delta_Citation_ij"] = KLD_citation_df["Citations_ij_CenterNormed"] - KLD_citation_df["KLD_CenterNormed_T"]			

				KLD_citation_df["Citations_ji_CenterNormed"] = KLD_citation_df.groupby('Year')[['Citations_ji']].transform(lambda x: (x-x.mean())/x.std())
				KLD_citation_df["Delta_Citation_ji"] = KLD_citation_df["Citations_ji_CenterNormed"] - KLD_citation_df["KLD_CenterNormed"]	


				# FINAL KLD CITAITON DF
				KLD_citation_df["Percentile_Cutoff"] = percentile_cutoff_
				KLD_citation_df["Citation_Window"] = citation_window
				KLD_citation_df["Citation_Type"] = citation_type
				KLD_citation_df["Journal_Censoring"] = journal_censored
				KLD_citation_df = pd.merge(KLD_citation_df,Number_of_Field_Documents_df,on=["Year"],how="left")
				KLD_citation_df_Final = KLD_citation_df_Final.append(KLD_citation_df) 

				####################################
				##### Degree Centrality of Countries
				####################################

				Degree_df = pd.DataFrame()
				for year_ in range(KLD_citation_df.Year.min(),KLD_citation_df.Year.max()+1):
					try:
						# KLD | Out 
						G_OutDegree_KLD = nx.from_pandas_edgelist(KLD_citation_df.query("Year==@year_").rename(columns={"Sender":"source","Receiver":"target","KLD_CenterNormed":"weight"})[["source","target","weight"]],edge_attr=True,create_using=nx.DiGraph())
						KLD_OutDegree_Year = dict(list(G_OutDegree_KLD.out_degree(weight="weight")))
						KLD_OutDegree_Year = {country:degree/(len(KLD_OutDegree_Year)-1) for country, degree in KLD_OutDegree_Year.items()}
						KLD_OutDegree_Year_df = pd.DataFrame.from_dict(KLD_OutDegree_Year,orient="index").reset_index().rename(columns={"index":"Country",0:"KLD_OutDegree_Centrality"})
						KLD_OutDegree_Year_df["Year"] = year_


						# KLD_T | Out 
						G_OutDegree_KLD_T = nx.from_pandas_edgelist(KLD_citation_df.query("Year==@year_").rename(columns={"Sender":"source","Receiver":"target","KLD_CenterNormed_T":"weight"})[["source","target","weight"]],edge_attr=True,create_using=nx.DiGraph())
						KLD_T_OutDegree_Year = dict(list(G_OutDegree_KLD_T.out_degree(weight="weight")))
						KLD_T_OutDegree_Year = {country:degree/(len(KLD_T_OutDegree_Year)-1) for country, degree in KLD_T_OutDegree_Year.items()}
						KLD_T_OutDegree_Year_df = pd.DataFrame.from_dict(KLD_T_OutDegree_Year,orient="index").reset_index().rename(columns={"index":"Country",0:"KLD_T_OutDegree_Centrality"})
						KLD_T_OutDegree_Year_df["Year"] = year_

						# KLD | In 
						G_InDegree_KLD = nx.from_pandas_edgelist(KLD_citation_df.query("Year==@year_").rename(columns={"Sender":"source","Receiver":"target","KLD_CenterNormed":"weight"})[["source","target","weight"]],edge_attr=True,create_using=nx.DiGraph())
						KLD_InDegree_Year = dict(list(G_InDegree_KLD.in_degree(weight="weight")))
						KLD_InDegree_Year = {country:degree/(len(KLD_InDegree_Year)-1) for country, degree in KLD_InDegree_Year.items()}
						KLD_InDegree_Year_df = pd.DataFrame.from_dict(KLD_InDegree_Year,orient="index").reset_index().rename(columns={"index":"Country",0:"KLD_InDegree_Centrality"})
						KLD_InDegree_Year_df["Year"] = year_

						# KLD_T | In 
						G_InDegree_KLD_T = nx.from_pandas_edgelist(KLD_citation_df.query("Year==@year_").rename(columns={"Sender":"source","Receiver":"target","KLD_CenterNormed_T":"weight"})[["source","target","weight"]],edge_attr=True,create_using=nx.DiGraph())
						KLD_T_InDegree_Year = dict(list(G_InDegree_KLD_T.in_degree(weight="weight")))
						KLD_T_InDegree_Year = {country:degree/(len(KLD_T_InDegree_Year)-1) for country, degree in KLD_T_InDegree_Year.items()}
						KLD_T_InDegree_Year_df = pd.DataFrame.from_dict(KLD_T_InDegree_Year,orient="index").reset_index().rename(columns={"index":"Country",0:"KLD_T_InDegree_Centrality"})
						KLD_T_InDegree_Year_df["Year"] = year_

						# Citation ji | Out
						G_OutDegree_Citation_ji = nx.from_pandas_edgelist(KLD_citation_df.query("Year==@year_").rename(columns={"Sender":"source","Receiver":"target","Citations_ji":"weight"})[["source","target","weight"]],edge_attr=True,create_using=nx.DiGraph())
						Citation_ji_OutDegree_Year = dict(list(G_OutDegree_Citation_ji.out_degree(weight="weight")))
						Citation_ji_OutDegree_Year = {country:degree/(len(Citation_ji_OutDegree_Year)-1) for country, degree in Citation_ji_OutDegree_Year.items()}
						Citation_ji_OutDegree_Year = pd.DataFrame.from_dict(Citation_ji_OutDegree_Year,orient="index").reset_index().rename(columns={"index":"Country",0:"Citation_ji_OutDegree_Centrality"})
						Citation_ji_OutDegree_Year["Year"] = year_

						# Citation ji | In
						G_InDegree_Citation_ji = nx.from_pandas_edgelist(KLD_citation_df.query("Year==@year_").rename(columns={"Sender":"source","Receiver":"target","Citations_ji":"weight"})[["source","target","weight"]],edge_attr=True,create_using=nx.DiGraph())
						Citation_ji_InDegree_Year = dict(list(G_InDegree_Citation_ji.in_degree(weight="weight")))
						Citation_ji_InDegree_Year = {country:degree/(len(Citation_ji_InDegree_Year)-1) for country, degree in Citation_ji_InDegree_Year.items()}
						Citation_ji_InDegree_Year = pd.DataFrame.from_dict(Citation_ji_InDegree_Year,orient="index").reset_index().rename(columns={"index":"Country",0:"Citation_ji_InDegree_Centrality"})
						Citation_ji_InDegree_Year["Year"] = year_

						# Citation ij | Out
						G_OutDegree_Citation_ij = nx.from_pandas_edgelist(KLD_citation_df.query("Year==@year_").rename(columns={"Sender":"source","Receiver":"target","Citations_ij":"weight"})[["source","target","weight"]],edge_attr=True,create_using=nx.DiGraph())
						Citation_ij_OutDegree_Year = dict(list(G_OutDegree_Citation_ij.out_degree(weight="weight")))
						Citation_ij_OutDegree_Year = {country:degree/(len(Citation_ij_OutDegree_Year)-1) for country, degree in Citation_ij_OutDegree_Year.items()}
						Citation_ij_OutDegree_Year = pd.DataFrame.from_dict(Citation_ij_OutDegree_Year,orient="index").reset_index().rename(columns={"index":"Country",0:"Citation_ij_OutDegree_Centrality"})
						Citation_ij_OutDegree_Year["Year"] = year_

						# Citation ij | In
						G_InDegree_Citation_ij = nx.from_pandas_edgelist(KLD_citation_df.query("Year==@year_").rename(columns={"Sender":"source","Receiver":"target","Citations_ij":"weight"})[["source","target","weight"]],edge_attr=True,create_using=nx.DiGraph())
						Citation_ij_InDegree_Year = dict(list(G_InDegree_Citation_ij.in_degree(weight="weight")))
						Citation_ij_InDegree_Year = {country:degree/(len(Citation_ij_InDegree_Year)-1) for country, degree in Citation_ij_InDegree_Year.items()}
						Citation_ij_InDegree_Year = pd.DataFrame.from_dict(Citation_ij_InDegree_Year,orient="index").reset_index().rename(columns={"index":"Country",0:"Citation_ij_InDegree_Centrality"})
						Citation_ij_InDegree_Year["Year"] = year_


						# Merge
						Degree_Year = pd.merge(Citation_ji_OutDegree_Year,
							Citation_ji_InDegree_Year,on=["Country","Year"],how="outer")
						Degree_Year = pd.merge(Degree_Year,
							KLD_OutDegree_Year_df,on=["Country","Year"],how="outer")
						Degree_Year = pd.merge(Degree_Year,
							KLD_InDegree_Year_df,on=["Country","Year"],how="outer")

						Degree_Year = pd.merge(Degree_Year,
							KLD_T_OutDegree_Year_df,on=["Country","Year"],how="outer")
						Degree_Year = pd.merge(Degree_Year,
							KLD_T_InDegree_Year_df,on=["Country","Year"],how="outer")

						Degree_Year = pd.merge(Degree_Year,
							Citation_ij_OutDegree_Year,on=["Country","Year"],how="outer")
						Degree_Year = pd.merge(Degree_Year,
							Citation_ij_InDegree_Year,on=["Country","Year"],how="outer")

						Degree_df = Degree_df.append(Degree_Year)

					except:
						continue 

				Degree_df = pd.merge(Degree_df,num_papers_df.rename(columns={"Nation":"Country"}),on=["Country","Year"],how="left")

				Degree_df["Discipline"] = discipline
				Degree_df["Percentile_Cutoff"] = percentile_cutoff_
				Degree_df["Citation_Window"] = citation_window
				Degree_df["Citation_Type"] = citation_type
				Degree_df["Journal_Censoring"] = journal_censored
				Degree_df = pd.merge(Degree_df,Number_of_Field_Documents_df,on=["Year"],how="left")
				Degree_df_Final = Degree_df_Final.append(Degree_df)

				####################################
				##### Delta
				####################################
				# Delta_ij = Citation_ij - KLD_T
				# Delta_ji = Citation_ji - KLD
				OutDelta_Citation_ij_df = pd.DataFrame()
				InDelta_Citation_ij_df = pd.DataFrame()
				OutDelta_Citation_ji_df = pd.DataFrame()
				InDelta_Citation_ji_df = pd.DataFrame()

				for year_ in range(KLD_citation_df.Year.min(),KLD_citation_df.Year.max()+1):
					try:
						# Out Degree | Delta ij = Citation ij - KLD_T 
						G_OutDelta_Citation_ij_Year = nx.from_pandas_edgelist(KLD_citation_df.query("Year==@year_").rename(columns={"Sender":"source","Receiver":"target","Delta_Citation_ij":"weight"})[["source","target","weight"]],edge_attr=True,create_using=nx.DiGraph())
						OutDelta_Citation_ij_Year = dict(list(G_OutDelta_Citation_ij_Year.out_degree(weight="weight")))
						OutDelta_Citation_ij_Year = {country:degree/(len(OutDelta_Citation_ij_Year)-1) for country, degree in OutDelta_Citation_ij_Year.items()}
						OutDelta_Citation_ij_Year = pd.DataFrame.from_dict(OutDelta_Citation_ij_Year,orient="index").reset_index().rename(columns={"index":"Country",0:"OutDelta_Citation_ij"})
						OutDelta_Citation_ij_Year["Year"] = year_
						OutDelta_Citation_ij_df = OutDelta_Citation_ij_df.append(OutDelta_Citation_ij_Year)

						# In Degree | Delta ij = Citation ij - KLD_T 
						G_InDelta_Citation_ij_Year = nx.from_pandas_edgelist(KLD_citation_df.query("Year==@year_").rename(columns={"Sender":"source","Receiver":"target","Delta_Citation_ij":"weight"})[["source","target","weight"]],edge_attr=True,create_using=nx.DiGraph())
						InDelta_Citation_ij_Year = dict(list(G_InDelta_Citation_ij_Year.in_degree(weight="weight")))
						InDelta_Citation_ij_Year = {country:degree/(len(InDelta_Citation_ij_Year)-1) for country, degree in InDelta_Citation_ij_Year.items()}
						InDelta_Citation_ij_Year = pd.DataFrame.from_dict(InDelta_Citation_ij_Year,orient="index").reset_index().rename(columns={"index":"Country",0:"InDelta_Citation_ij"})
						InDelta_Citation_ij_Year["Year"] = year_
						InDelta_Citation_ij_df = InDelta_Citation_ij_df.append(InDelta_Citation_ij_Year)

						# Out Degree | Delta ji = Citation ji - KLD 
						G_OutDelta_Citation_ji_Year = nx.from_pandas_edgelist(KLD_citation_df.query("Year==@year_").rename(columns={"Sender":"source","Receiver":"target","Delta_Citation_ji":"weight"})[["source","target","weight"]],edge_attr=True,create_using=nx.DiGraph())
						OutDelta_Citation_ji_Year = dict(list(G_OutDelta_Citation_ji_Year.out_degree(weight="weight")))
						OutDelta_Citation_ji_Year = {country:degree/(len(OutDelta_Citation_ji_Year)-1) for country, degree in OutDelta_Citation_ji_Year.items()}
						OutDelta_Citation_ji_Year = pd.DataFrame.from_dict(OutDelta_Citation_ji_Year,orient="index").reset_index().rename(columns={"index":"Country",0:"OutDelta_Citation_ji"})
						OutDelta_Citation_ji_Year["Year"] = year_
						OutDelta_Citation_ji_df = OutDelta_Citation_ji_df.append(OutDelta_Citation_ji_Year)

						# In Degree | Delta ji = Citation ji - KLD 
						G_InDelta_Citation_ji_Year = nx.from_pandas_edgelist(KLD_citation_df.query("Year==@year_").rename(columns={"Sender":"source","Receiver":"target","Delta_Citation_ji":"weight"})[["source","target","weight"]],edge_attr=True,create_using=nx.DiGraph())
						InDelta_Citation_ji_Year = dict(list(G_InDelta_Citation_ji_Year.in_degree(weight="weight")))
						InDelta_Citation_ji_Year = {country:degree/(len(InDelta_Citation_ji_Year)-1) for country, degree in InDelta_Citation_ji_Year.items()}
						InDelta_Citation_ji_Year = pd.DataFrame.from_dict(InDelta_Citation_ji_Year,orient="index").reset_index().rename(columns={"index":"Country",0:"InDelta_Citation_ji"})
						InDelta_Citation_ji_Year["Year"] = year_
						InDelta_Citation_ji_df = InDelta_Citation_ji_df.append(InDelta_Citation_ji_Year)


					except:
						continue 

				Delta_df = pd.merge(OutDelta_Citation_ij_df,InDelta_Citation_ij_df,on=["Year","Country"])
				Delta_df = pd.merge(Delta_df,OutDelta_Citation_ji_df,on=["Year","Country"])
				Delta_df = pd.merge(Delta_df,InDelta_Citation_ji_df,on=["Year","Country"])

				Delta_df["Discipline"] = discipline
				Delta_df["Percentile_Cutoff"] = percentile_cutoff_
				Delta_df["Citation_Window"] = citation_window
				Delta_df["Citation_Type"] = citation_type
				Delta_df["Journal_Censoring"] = journal_censored
				Delta_df = pd.merge(Delta_df,Number_of_Field_Documents_df,on=["Year"],how="left")
				Delta_df = pd.merge(Delta_df,num_papers_df.rename(columns={"Nation":"Country"}),on=["Country","Year"],how="left")
				Delta_df_Final = Delta_df_Final.append(Delta_df)

#################
####### Output
#################

if ((os.path.exists(model_df_MRQAP_KLD_Filename)==False)): 
	try:
		model_df_Beta_KLD_Final.to_csv(model_df_MRQAP_KLD_Filename,compression="gzip",index=False)
	except Exception as e:
		print(e)

if ( (os.path.exists(KLD_citation_df_Filename)==False) ):
	try:
		KLD_citation_df_Final.to_csv(KLD_citation_df_Filename,compression="gzip", index=False)
	except Exception as e:
		print(e)

if ((os.path.exists(Delta_df_Filename)==False) ):
	try:
		Delta_df_Final.to_csv(Delta_df_Filename,compression="gzip", index=False)
	except Exception as e:
		print(e)

if ( (os.path.exists(Degree_df_Filename)==False)):
	try:
		Degree_df_Final.to_csv(Degree_df_Filename,compression="gzip", index=False)
	except Exception as e:
		print(e)
